/**
 *  Copyright 2013 University Pierre & Marie Curie - UMR CNRS 7606 (LIP6/MoVe)
 *  All rights reserved.   This program and the accompanying materials
 *  are made available under the terms of the Eclipse Public License v1.0
 *  which accompanies this distribution, and is available at
 *  http://www.eclipse.org/legal/epl-v10.html
 *
 *  Initial contributor:
 *    Lom M. Hillah - <lom-messan.hillah@lip6.fr>
 *
 *  Mailing list:
 *    lom-messan.hillah@lip6.fr
 */
package fr.lip6.msr4j.utils.parsers;

import java.io.File;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;

import fr.lip6.msr4j.utils.config.MSR4JLogger;

/**
 * Simple HTML page parse, relying on Jsoup to extract data.
 * @author lom
 *
 * @param <T> the complex type structuring the data you collected from the page(s).
 */
public abstract class HTMLPageParser <T> {
	protected final Logger logger;
	
	public HTMLPageParser() {
		logger = MSR4JLogger.getLogger(this.getClass().getCanonicalName());
	}
	
	/**
	 * Loads a HTML document from local file system, providing the base URI
	 * from which that document comes from.
	 * @param indexLocalUrl
	 * @param indexBaseUrl
	 * @param encoding
	 * @return
	 * @throws IOException
	 */
	public Document loadDocumentFromLocal(String indexLocalUrl,
			String indexBaseUrl, String encoding) throws IOException {
		File input = new File(indexLocalUrl);
		return Jsoup.parse(input, encoding, indexBaseUrl);
	}
	
	/**
	 * Loads a document from a remote URL.
	 * @param remoteURL
	 * @return
	 * @throws IOException
	 */
	public Document loadDocumentFromRemote(String remoteURL) throws IOException {
		return Jsoup.connect(remoteURL).get();
	}
	
	/**
	 * Returns the object of the complex type structuring the data you collected.
	 * @return
	 */
	public abstract T parseIndex();
	
}
